1) Import de las librerias que utilizaremos
In [1]:
import urllib.request
import wfdb
import psycopg2
from psycopg2.extensions import AsIs
2) Leemos el archivo con las WaveForm que vamos a utilizar
In [2]:
target_url = "https://physionet.org/physiobank/database/mimic3wdb/matched/RECORDS-waveforms"
data = urllib.request.urlopen(target_url) # it's a file like object and works just like a file
lines = data.readlines();
line = str(lines[2])
line
Out[2]:
3) Limpiamos los caracteres extraños y Dividimos la cadena donde pXXNNNN-YYYY-MM-DD-hh-mm donde XXNNNN es el identificador unico del paciente SUBJECT_ID y YYYY-MM-DD-hh-mm es la fecha de la estadia del paciente'
In [3]:
line = line.replace('b\'','').replace('\'','').replace('\\n','')
splited = line.split("/")
carpeta,subCarpeta,onda = line.split("/")
carpeta = carpeta+"/"+subCarpeta
subject_id = subCarpeta.replace('p','')
recordDate = onda.replace(subCarpeta+"-","")
print("subject_id: ",subject_id)
print("recordDate: ",recordDate)
print("onda: ",onda)
print("carpeta: ",carpeta)
4) Leemos el encabezado del waveform, para obtener la información del paciente que almacenaremos
In [4]:
try:
sig, fields = wfdb.srdsamp(onda,pbdir='mimic3wdb/matched/'+carpeta, sampto=1)
print(fields)
except Exception as inst:
print("onda vacia")
Le agregamos el subject_id y la fecha del record a los campos
In [5]:
fields['subject_id'] = subject_id
fields['recordDate'] = recordDate
convertimos los campos en un diccionario
In [6]:
columns = fields.keys()
values = [fields[column] for column in columns]
print(columns)
Nos conectamos a la base de datos postgres donde almacenaremos los datos
In [7]:
conn = psycopg2.connect("dbname=mimic")
cur = conn.cursor()
Creamos la tabla donde quedaran almacenados los datos
In [8]:
table = "waveformFields"
#cur.execute("DROP TABLE "+table)
cur.execute("CREATE TABLE IF NOT EXISTS "+table+
" (id serial PRIMARY KEY,"+
"comments character varying(255)[],"+
"fs integer, signame character varying(255)[],"+
"units character varying(255)[],"+
"subject_id integer,"+
"recordDate character varying(255));")
verificamos si ya existe el dato
In [9]:
def track_not_exists(cur, subject_id,recordDate):
select_stament = 'select id from '+table+' where subject_id= %s and recorddate = %s'
cur.execute(select_stament,(int(subject_id),recordDate))
return cur.fetchone() is None
#print(cur.mogrify(select_stament,(int(subject_id),recordDate)))
notExist = False
if track_not_exists(cur,subject_id,recordDate):
notExist = True
print("not exist")
Insertamos los datos
In [10]:
insert_statement = 'insert into '+table+' (%s) values %s'
print(cur.mogrify(insert_statement, (AsIs(','.join(columns)), tuple(values))))
if notExist:
cur.execute(insert_statement, (AsIs(','.join(columns)), tuple(values)))
Hacemos commit
In [11]:
conn.commit()
cerramos conexion
In [12]:
conn.close()